import pandas as pd
import itertools
import random
from scipy.stats import bernoulli
import numpy as np
import copy
import traceback
import joblib


import os
import sys
sys.path.append("..")
sys.path.append("GerryFair")
import gerryfair
import gc


from util_functions_cbs_benchmarking import *
from multiaccuracy_funcs import * 

from conditional_bias_scan import ConditionalBiasScan
from cbs_preprocessor import CBSPreProcessor
from cbs_logger import CBSLogger
from yaml_funcs import YamlFunctions
from dataset_specific_funcs import DatasetSpecificFuncs
from sklearn import linear_model


import matplotlib.pyplot as plt
import time

from  scipy.stats import pearsonr

import multiprocessing as mp

# read in data set

timestr = time.strftime("_%Y%m%d_%H%M%S")
##experiment_name = "1A"


df = pd.read_csv("../../toy_datasets/COMPAS_v2.csv")

#df = pd.read_csv("../../toy_datasets/noncorrelation_synthetic_compas.csv")

#df = pd.concat([df,df,df,df, df, df,df, df] , axis = 0, ignore_index= True)

# limit data to features only

df = df[['Under 25','Prior Offenses','Race','ChargeDegree','Sex']]

# creating deep copy of original dataset
df_copy = df.copy(deep =True)
deep_df_copy = df_copy.copy(deep = True)

# list of feature columns
cols_copy =  ["Under 25", "Prior Offenses", "Race", "ChargeDegree", "Sex"]


######################## MODERATE VALUES ######################################
### setting fixed values
sigma = .2

# noise for producing true log-odds
sigma_true = .6

# probability of selecting a feature value
pr_subset = .5

# number of features to select
num_affected =2

###############################################################################

# shift of true log-odds for protected class (+Delta) and non-protected class (-Delta)
_Delta = 0

# shift of true log-odds for bias subset in protected class (+delta) and bias subset in non-protected class (-delta)
_delta = 0



# configs file for CBS
#yaml_configs_path = "../fsscan_yamls/fsscan_configs-CBS_benchmark.yaml"
yaml_configs_path = "../fsscan_yamls/fsscan_configs-CBS_benchmark.yaml"
#yaml_configs_path = "../fsscan_yamls/fsscan_configs-CBS_benchmark-binary_sufficiency_scan_CV.yaml"

#making temp copies

## needed for benchmark tests
dataset = None
attributes = None
centered = True
#######

# needed for parrallel processing
active_workers = 0
completed_workers = 0

active_processes_list = []
active_worker_constant = 10
#


def run_wrapper( run_info):
    #run_info = run_info[0]
    
    scan_params = run_info["scan_params"]
    #print(scan_params)
    #print(list(scan_params))
    scan = scan_params["scan_info"]
    dataset_yaml = scan_params["dataset_yaml"]
    data = scan_params["data"]
    p_bin_var = scan_params["p_bin_var"]
    tilde_probability_var = scan_params["tilde_probability_var"]
    
    df_t = scan_params["df_t"] 
    df_copy = scan_params["df_copy"] 
    
    s_bias = run_info["selected_bias_subset"]
    group_ind = run_info["group_ind"]
    key, key_value = run_info["protected_class"] 
    experiment_name = scan_params["experiment_name"]
    print("printing scan type:::")
    print(scan["scan_type"])
    print(str(run_info["run_number"]))
    print(str(run_info["mu"]))
    
    ## needed for benchmark tests
    dataset = None
    attributes = None
    centered = True
    #######
    
    cbs = ConditionalBiasScan( scan["protected_class"], scan["protected_value"], scan["combo"], scan["event"] ,scan["conditional_variable"], fsscan_configs["fsscan_params"],scan["direction"], scan["feature_list"], scan["scan_type"] , scan["scan_feature_list"], scan["threshold_probability"], scan["threshold_cutoff"] )
    results =  cbs.run(dataset_yaml, data,p_bin_var, tilde_probability_var)
    
    stats_dict = cbs_logger.write_results(results["best_subset"], 
                             results["best_score"], 
                             results["best_param"], 
                             results["treatment"], 
                             results["treatment_events"], 
                             results["treatment_p_hat"], 
                             results["controls"],
                             results["control_events"],
                            results["control_conditional_var"],
                            results["treatment_conditional_var"],
                            results["dataset_yaml"],
                            scan["protected_class"],
                            scan["protected_value"], 
                            scan["combo"],
                            scan["event"],
                            scan["conditional_variable"],
                            fsscan_configs["fsscan_params"],
                            scan["direction"],
                            scan["feature_list"],
                            scan["scan_type"],
                            scan["scan_feature_list"],
                            "", add_scores = True, include_conditional_var_base_rates = True )
    
    other_dict = cbs_logger.write_results(s_bias, 
                             -1000000000, 
                             np.inf, 
                             results["treatment"], 
                             results["treatment_events"], 
                             results["treatment_p_hat"], 
                             results["controls"],
                             results["control_events"],
                            results["control_conditional_var"],
                            results["treatment_conditional_var"],
                            results["dataset_yaml"],
                            scan["protected_class"],
                            scan["protected_value"], 
                            scan["combo"],
                            scan["event"],
                            scan["conditional_variable"],
                            fsscan_configs["fsscan_params"],
                            scan["direction"],
                            scan["feature_list"],
                            scan["scan_type"],
                            scan["scan_feature_list"], "_for_s_bias", add_scores = True, include_conditional_var_base_rates = True)
    
    del run_info["scan_params"]
    #experiment_name
    

    
    print("coefficients used for variable of logistic regression used to produce \hat p: ")
    print(results["p_hat_coefficient_mapping"])
   # print("tilde_p's coefficient is "+str(results["p_hat_coefficient_mapping"][scan["conditional_variable"]]))
            
    print("best subset found : " + str(results["best_subset"]))
    print("best score : " + str(results["best_score"]))
    print("param for best scoring subset : "+ str(results["best_param"]))
            
    s_found_subset = results["best_subset"]
    print(s_found_subset)
    print("accuracy:")
    print(compute_accuracy(df_t, s_bias,s_found_subset,group_ind))
            
    run_info["combo"] = scan["combo"]
    run_info["event"] = scan["event"]
    run_info["experiment_name"] = experiment_name
    run_info["conditional_variable"] = scan["conditional_variable"]
    run_info["fsscan_params"] = fsscan_configs["fsscan_params"]
    run_info["direction"] = scan["direction"]
    run_info["feature_list"] = scan["feature_list"]
    run_info["scan_type"] = scan["scan_type"] 
    run_info["scan_feature_list"] = scan["scan_feature_list"]
    run_info["threshold_probability"] = scan["threshold_probability"]
    run_info["threshold_cutoff"] = scan["threshold_cutoff"]
            
            
    run_info["best_subset"]  = results["best_subset"] 
    run_info["best_score"] = results["best_score"]
    run_info["best_param"] =  results["best_param"]

    run_info["cbs_accuracy"] =  compute_accuracy(df_t, s_bias,results["best_subset"], group_ind)
    run_info["cbs_precision"] =  compute_precision(df_t, s_bias,results["best_subset"], group_ind)
    run_info["cbs_recall"] =  compute_recall(df_t, s_bias,results["best_subset"], group_ind)
    run_info["cbs_param"] = results["best_param"]
    run_info["cbs_score"] = results["best_score"]
    run_info["p_hat_coefficient_mapping"] = results["p_hat_coefficient_mapping"]
    run_info["experiment_name"] = experiment_name
            
            #### for prediction separation
            ######## run benchmark testsw
            
    if "prediction_separation" in scan["scan_type"]:
                
        recommendations =  df_t["predicted_probs"]
        df_p = df_t[['Under 25', 'Prior Offenses', 'Race', 'ChargeDegree', 'Sex', 'ReoffendedWithinTwoYears', group_ind]]
        del df_p[key]
                
        
        # gerryfair - all features FP

        attributes_df = create_attributes_data_all_protected(df_p, 'ReoffendedWithinTwoYears')
        X, X_prime, y = gerryfair.clean.clean_dataset(dataset, attributes, centered, data = df_p , attributes_df = attributes_df)
        auditor = gerryfair.model.Auditor(X_prime, y, 'FP')
        [violated_group, fairness_violation, group] = auditor.audit(recommendations, under_estimation = True)

        run_info["gerryfair_all_features_accuracy_FP"] = compute_accuracy_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_all_features_precision_FP"] = compute_precision_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_all_features_recall_FP"] = compute_recall_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_all_features_fairness_violation_score_FP"] = fairness_violation

        # gerryfair - one feature FP

        attributes_df = create_attributes_data_one_protected(df_p, 'ReoffendedWithinTwoYears', group_ind)
        X, X_prime, y = gerryfair.clean.clean_dataset(dataset, attributes, centered, data = df_p , attributes_df = attributes_df)

        auditor = gerryfair.model.Auditor(X_prime, y, 'FP')
        [violated_group, fairness_violation, group] = auditor.audit(recommendations, under_estimation = True)
        run_info["gerryfair_one_features_accuracy_FP"] = compute_accuracy_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_one_features_precision_FP"] = compute_precision_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_one_features_recall_FP"] = compute_recall_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_one_features_fairness_violation_score_FP"] = fairness_violation
        
        # gerryfair - all features FN

        attributes_df = create_attributes_data_all_protected(df_p, 'ReoffendedWithinTwoYears')
        X, X_prime, y = gerryfair.clean.clean_dataset(dataset, attributes, centered, data = df_p , attributes_df = attributes_df)
        auditor = gerryfair.model.Auditor(X_prime, y, 'FN')
        [violated_group, fairness_violation, group] = auditor.audit(recommendations, under_estimation = True)

        run_info["gerryfair_all_features_accuracy_FN"] = compute_accuracy_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_all_features_precision_FN"] = compute_precision_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_all_features_recall_FN"] = compute_recall_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_all_features_fairness_violation_score_FN"] = fairness_violation

        # gerryfair - one feature FN

        attributes_df = create_attributes_data_one_protected(df_p, 'ReoffendedWithinTwoYears', group_ind)
        X, X_prime, y = gerryfair.clean.clean_dataset(dataset, attributes, centered, data = df_p , attributes_df = attributes_df)

        auditor = gerryfair.model.Auditor(X_prime, y, 'FN')
        [violated_group, fairness_violation, group] = auditor.audit(recommendations, under_estimation = True)
                #print(group)
        run_info["gerryfair_one_features_accuracy_FN"] = compute_accuracy_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_one_features_precision_FN"] = compute_precision_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_one_features_recall_FN"] = compute_recall_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_one_features_fairness_violation_score_FN"] = fairness_violation

        # run multiaccuracy
        
        print("Running multi-accuracy")

        outcomes = df_p["ReoffendedWithinTwoYears"]
        del df_p["ReoffendedWithinTwoYears"]

        res_data = run_multiaccuracy(df_p, df_t["predicted_log_odds"], outcomes, "FP")
        corr, ma_id = find_highest_corr(res_data)

        run_info["multiaccuracy_accuracy"] = compute_accuracy_gerryfair(df_p, list(ma_id), s_bias,group_ind)
        run_info["multiaccuracy_precision"] = compute_precision_gerryfair(df_p, list(ma_id), s_bias,group_ind)
        run_info["multiaccuracy_recall"] = compute_recall_gerryfair(df_p, list(ma_id), s_bias,group_ind)
        run_info["multiaccuracy_score"] = corr

        res_data = run_multiaccuracy_org(df_p, df_t["predicted_log_odds"], outcomes, "FP")
        corr, ma_id = find_highest_corr(res_data)

        run_info["multiaccuracy_accuracy_org"] = compute_accuracy_gerryfair(df_p, list(ma_id), s_bias,group_ind)
        run_info["multiaccuracy_precision_org"] = compute_precision_gerryfair(df_p, list(ma_id), s_bias,group_ind)
        run_info["multiaccuracy_recall_org"] = compute_recall_gerryfair(df_p, list(ma_id), s_bias,group_ind)
        run_info["multiaccuracy_score_org"] = corr

        res_data = run_multiaccuracy_exp(df_p, df_t["predicted_log_odds"], outcomes, "FP")
        corr, ma_id = find_highest_corr(res_data)

        run_info["multiaccuracy_accuracy_exp"] = compute_accuracy_gerryfair(df_p, list(ma_id), s_bias,group_ind)
        run_info["multiaccuracy_precision_exp"] = compute_precision_gerryfair(df_p, list(ma_id), s_bias,group_ind)
        run_info["multiaccuracy_recall_exp"] = compute_recall_gerryfair(df_p, list(ma_id), s_bias,group_ind)
        run_info["multiaccuracy_score_exp"] = corr

        #run_info_deep_copy = copy.deepcopy(run_info)
        #run_infos.append(run_info_deep_copy)

    run_info = {**run_info , **stats_dict}
    run_info = {**run_info , **other_dict}
    
    file_name = "Benchmark_results/_"+ timestr +"/bias_results"+ "/_run_num_" + str(run_info["run_number"]) + "_"+str(experiment_name)+"_"+ str(run_info["varying_parameter"])+"_"+scan["scan_type"]+"_sigmapred_"+ str(run_info["sigma"]) +"_mu_"+str(run_info["mu"])+".csv"
    pd.DataFrame([run_info]).to_csv(file_name)

    
# create new folder
folder_path = "Benchmark_results/"+"_"+ timestr 
folder_path_bias = "Benchmark_results/"+"_"+ timestr +"/bias_results"
folder_path_org = "Benchmark_results/_" +timestr +"/original_data_sets"
os.mkdir(folder_path)
os.mkdir(folder_path_bias)
os.mkdir(folder_path_org)

simulations_to_run = ["1A", "1B", "1CD", "1E", "2A", "2B", "2CD", "2E", "3", "1A_suff", "1B_suff", "1CD_suff", "1E_suff","3_suff"]
#simulations_to_run = ["1A"]
#simulations_to_run = ["1A", "1B",  "1E","3", "4", "2A", "5A", "5B"]
#simulations_to_run = ["1A", "2A"]
#simulations_to_run = ["1A", "1A_suff"]
#simulations_to_run = ["2A"]

sigma_coef = .2

for run_number in range(0,100):

    unsuccessful = True
    run_infos = [] 
    print("running run number "+str(run_number))
    gc.collect()
    while (unsuccessful == True):
        print("trying again for "+str(run_number))
        df_copy = deep_df_copy.copy(deep = True)
        run_infos = [] 
        ######################## MODERATE VALUES ######################################
        ### setting fixed values
        sigma = .2

        # noise for producing true log-odds
        sigma_true = .6

        # probability of selecting a feature value
        pr_subset = .5

        # number of features to select
        num_affected =2

        ###############################################################################

        # shift of true log-odds for protected class (+Delta) and non-protected class (-Delta)
        _Delta = 0

        # shift of true log-odds for bias subset in protected class (+delta) and bias subset in non-protected class (-delta)
        _delta = 0
        
        sigma_coef =.2

        try:
            run_infos = [] 
            run_info = {}
            run_info["run_number"] = run_number

            df_t = df_copy.copy(deep = True)
            cols_t = copy.deepcopy(cols_copy)

            df_t, cols_t, key, key_value, group_ind, s_bias, len_p, len_c, true_log_odds, coefficient_map = pick_protected_class_bias_subset_no_filter_gaussian(df_t, cols_t, 150, num_affected, pr_subset, _Delta,sigma_true, sigma_coef)
            # copying outcomes over to COMPAS dataset
            df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]
            df_copy_within_run = df_copy.copy(deep = True)

            #print("Protected class:"+ key+ " : " + str(key_value))
            #print("Biased subset: "+ str(s_bias))
            #print("Number of individuals in bias subset for protected class: " + str(len_p))
            #print("Number of individuals in bias subset for non-protected class: " + str(len_c))
            #print("Coefficients used to protected true log-odds: "+ str(coefficient_map))

            to_choose =  df_t[s_bias.keys()].isin(s_bias).all(axis=1).astype(int)
            df_t["in_bias_subset"] = to_choose

            run_info["protected_class"] = (key, key_value)
            #run_info["_Delta"] = _Delta
            #run_info["sigma_true"] = sigma_true

            run_info["coefficient_values"] = coefficient_map


            run_info["selected_bias_subset"] = s_bias
            run_info["group_ind"] = group_ind
            run_info["number_of_rows_protected_class_bias_subset"] = len_p
            run_info["number_of_rows_control_bias_subset"] = len_c

            #mu_values = [i/10 for i in list(range(0,41))]
            
            # for probability shifts
            mu_values = [i*.05 for i in list(range(0,21))] ### .05 increases in probability
            
            
            #mu_values = [0,1,4]
            sigma_predicts = [.20]
            mu_suff = 0

            df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+".csv")

            # DIFFERENCES IN INJECTED BIAS
            
            if "1A" in simulations_to_run:

                # 1A
                for sigma in sigma_predicts:
                    for mu in mu_values: 
                        #print("running for mu: " + str(mu))

                        # add bias -- overwritten to 0
                        add_bias_shifted_by_mu_NO_SHIFT(df_t, s_bias, 0, sigma, group_ind)

                        run_info["_delta"] = _delta
                        run_info["mu"] = mu
                        run_info["mu_suff"] = mu_suff
                        run_info["sigma"] = sigma
                        run_info["sigma_true"] = sigma_true
                        run_info["_Delta"] = _Delta
                        run_info["mu_suff"] = 0
                        run_info["num_affected"] = num_affected
                        run_info["pr_subset"] = pr_subset

                        run_info["varying_parameter"] = mu

                        log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")
                        
                        
                        ###### here for predicted probability increase
                        probs_shift(df_t, s_bias, group_ind, "predicted_probs", mu)
                        ###### here for probability increase

                        df_copy["tilde_p"] = df_t["predicted_probs"]
                        #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                        if (mu == 0):
                            df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1A_"+ str(mu)+"_sigma_predict_"+str(sigma)+"_.csv")
                            df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1A_"+ str(mu)+"_sigma_predict_"+str(sigma)+"_full_.csv")



                        yaml_funcs = YamlFunctions(yaml_configs_path)
                        fsscan_configs = yaml_funcs.run()

                        #setting up logger
                        cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                        # performing initial data preprocessing, in this case there is not any
                        data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                        data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                        # producing all scans in config file
                        scans = yaml_funcs.produce_scans(data, key, key_value)
                        for scan in scans:
                            #print(scan)

                            scan_params = {}
                            scan_params["scan_info"] = scan
                            scan_params["dataset_yaml"] = dataset_yaml
                            scan_params["data"] =  data.copy(deep =True)
                            scan_params["p_bin_var"] = p_bin_var
                            scan_params["tilde_probability_var"] = tilde_probability_var
                            scan_params["df_t"] = df_t.copy(deep = True)
                            scan_params["df_copy"] = df_copy.copy(deep = True)
                            scan_params["experiment_name"] = "1A"
                            run_info["scan_params"] = scan_params
                            run_info_deep_copy = copy.deepcopy(run_info)
                            run_infos.append(run_info_deep_copy)
            
            
            
            df_copy = deep_df_copy.copy(deep =True)
            mu_suff = 0
            
            mu = 0
            
            #mu_values_suff = [i/10 for i in list(range(0,41))]
            
            # for probability shifts
            mu_values_suff = [i*.05 for i in list(range(0,21))] ### .05 increases in probability
            
            sigma_predicts = [.20]

            df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+".csv")
            
            copy_log_odds_org = copy.deepcopy(list(df_t["true_log_odds"]))
            copy_reffonding_outcomes = copy.deepcopy(list(df_t["ReoffendedWithinTwoYears"]))

            # DIFFERENCES IN INJECTED BIAS
            
            if "1A_suff" in simulations_to_run:

                # 1A
                for sigma in sigma_predicts:
                    for mu_suff in mu_values_suff: 
                        #print("running for mu: " + str(mu))
                        
                        df_t["true_log_odds"] = copy_log_odds_org

                        # add bias
                        add_bias_shifted_by_mu_NO_SHIFT(df_t, s_bias, mu, sigma, group_ind)

                        run_info["_delta"] = _delta
                        run_info["mu"] = mu
                        run_info["mu_suff"] = mu_suff
                        run_info["sigma"] = sigma
                        run_info["sigma_true"] = sigma_true
                        run_info["_Delta"] = _Delta
                        run_info["num_affected"] = num_affected
                        run_info["pr_subset"] = pr_subset

                        run_info["varying_parameter"] = mu_suff

                        log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                        df_copy["tilde_p"] = df_t["predicted_probs"]
                        #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]
                        
                        add_bias_shifted_log_odds_choose_new_outcomes_NON_ADDICTIVE_SHIFT(df_t, s_bias, (-1*mu_suff), group_ind)
                        df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]

                        if (mu_suff == 0):
                            df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1A_suff_"+ str(mu_suff)+"_sigma_predict_"+str(sigma)+"_.csv")
                            df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1A_suff_"+ str(mu_suff)+"_sigma_predict_"+str(sigma)+"_full_.csv")



                        yaml_funcs = YamlFunctions(yaml_configs_path)
                        fsscan_configs = yaml_funcs.run()

                        #setting up logger
                        cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                        # performing initial data preprocessing, in this case there is not any
                        data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                        data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                        # producing all scans in config file
                        scans = yaml_funcs.produce_scans(data, key, key_value)
                        for scan in scans:
                            #print(scan)

                            scan_params = {}
                            scan_params["scan_info"] = scan
                            scan_params["dataset_yaml"] = dataset_yaml
                            scan_params["data"] =  data.copy(deep =True)
                            scan_params["p_bin_var"] = p_bin_var
                            scan_params["tilde_probability_var"] = tilde_probability_var
                            scan_params["df_t"] = df_t.copy(deep = True)
                            scan_params["df_copy"] = df_copy.copy(deep = True)
                            scan_params["experiment_name"] = "1A_suff"
                            run_info["scan_params"] = scan_params
                            run_info_deep_copy = copy.deepcopy(run_info)
                            run_infos.append(run_info_deep_copy)

            # 1B
            

            df_copy = df_copy_within_run.copy(deep =True)
            
            
            sigma_true = .6
            
            #mu = 1
            mu = .5 ### go back a change after 1A_sep is run
            
            
            mu_suff = 0
            sigmas  = [i/10 for i in list(range(0,21))]
            if "1B" in simulations_to_run:
                for sigma in sigmas:
                    #print("running for sigma: " + str(sigma))
                    
                    df_t["true_log_odds"] = copy_log_odds_org

                    # add bias
                    add_bias_shifted_by_mu_NO_SHIFT(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta
                    run_info["mu"] = mu
                    run_info["mu_suff"] = mu_suff
                    run_info["sigma"] = sigma
                    run_info["sigma_true"] = sigma_true
                    run_info["_Delta"] = _Delta
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = sigma

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")
                    
                    ###### here for predicted probability increase
                    probs_shift(df_t, s_bias, group_ind, "predicted_probs", mu)
                    ###### here for probability increase

                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    
                    

                    if ((sigma == 0) or (sigma == .25)):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1B_"+ str(sigma)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1B_"+ str(sigma)+"_full_.csv")
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        #print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "1B"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)
            
            df_copy = df_copy_within_run.copy(deep =True)
            
            
            #1B_suff
            
            sigma_true = .6
            mu = 0
           
            #mu_suff = 1
            mu_suff = .5 # revisit 1A_suff runs 
            
            sigmas  = [i/10 for i in list(range(0,21))]
            if "1B_suff" in simulations_to_run:
                for sigma in sigmas:
                    #print("running for sigma: " + str(sigma))
                    
                    df_t["true_log_odds"] = copy_log_odds_org

                    # add bias
                    add_bias_shifted_by_mu_NO_SHIFT(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta
                    run_info["mu"] = mu
                    run_info["mu_suff"] = mu_suff
                    run_info["sigma"] = sigma
                    run_info["sigma_true"] = sigma_true
                    run_info["_Delta"] = _Delta
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = sigma

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    
                    add_bias_shifted_log_odds_choose_new_outcomes_NON_ADDICTIVE_SHIFT(df_t, s_bias, (-1*mu_suff), group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]

                    if ((sigma == 0) or (sigma == .25)):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1B_suff_"+ str(sigma)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1B_suff_"+ str(sigma)+"_full_.csv")
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        #print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "1B_suff"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)
            
            #2A
            


            df_copy = df_copy_within_run.copy(deep =True)

            mu_values = [0]
            mu_suff = 0
            sigma_predicts = [.20]
            sigma_true = .6
            
            
            #_delta_values = [(i/10)-4 for i in list(range(0,81))]
            
            _delta_values = [i * 0.05 for i in range(-20, 21)]
            
            #_delta_values = [-4,-3,-2,-1,0]

            #copy_log_odds_org = copy.deepcopy(list(df_t["true_log_odds"]))
            #copy_reffonding_outcomes = copy.deepcopy(list(df_t["ReoffendedWithinTwoYears"]))
            
            if "2A" in simulations_to_run:
                
                for sigma in sigma_predicts:
                    for mu in mu_values:

                        for _delta_value in _delta_values: 
                            #print("running for _delta: " + str(_delta_value))

                            df_t["true_log_odds"] = copy_log_odds_org
                            add_bias_shifted_log_odds_choose_new_outcomes_NON_ADDICTIVE_SHIFT(df_t, s_bias, _delta_value, group_ind)
                            df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]



                            # add bias
                            add_bias_shifted_by_mu_NO_SHIFT(df_t, s_bias, mu, sigma, group_ind)

                            run_info["_delta"] = _delta_value
                            run_info["mu"] = mu
                            run_info["mu_suff"] = mu_suff
                            run_info["sigma"] = sigma
                            run_info["sigma_true"] = sigma_true
                            run_info["_Delta"] = _Delta
                            run_info["num_affected"] = num_affected
                            run_info["pr_subset"] = pr_subset

                            run_info["varying_parameter"] = _delta_value

                            log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                            df_copy["tilde_p"] = df_t["predicted_probs"]

                            #CHANGE BACK POST EXPERIMENT
                            if (_delta_value == 0):
                            #if True:
                                df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_2A_"+ str(_delta_value)+"_sigmapred_"+str(sigma)+"_mu_"+str(mu)+"_.csv")
                                df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_2A_"+ str(_delta_value)+"_sigmapred_"+str(sigma)+"_mu_"+str(mu)+"_full_.csv")
                                #df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_2At_"+ str(_delta_value)+"_.csv")
                            #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                            yaml_funcs = YamlFunctions(yaml_configs_path)
                            fsscan_configs = yaml_funcs.run()

                            #setting up logger
                            cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                            # performing initial data preprocessing, in this case there is not any
                            data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                            data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                            # producing all scans in config file
                            scans = yaml_funcs.produce_scans(data, key, key_value)
                            for scan in scans:
                                #print(scan)

                                scan_params = {}
                                scan_params["scan_info"] = scan
                                scan_params["dataset_yaml"] = dataset_yaml
                                scan_params["data"] =  data.copy(deep =True)
                                scan_params["p_bin_var"] = p_bin_var
                                scan_params["tilde_probability_var"] = tilde_probability_var
                                scan_params["df_t"] = df_t.copy(deep = True)
                                scan_params["df_copy"] = df_copy.copy(deep = True)
                                scan_params["experiment_name"] = "2A"
                                run_info["scan_params"] = scan_params
                                run_info_deep_copy = copy.deepcopy(run_info)
                                run_infos.append(run_info_deep_copy)
                                
             #2B
            
    
            df_copy = df_copy_within_run.copy(deep = True)

            mu = 0
            mu_suff = 0
            _Delta = 0
            _delta = 0.25 ##### need to make sure this is the right default after a run of 2A
            sigma_true=.6
            sigma = .2
            sigmas  = [i/10 for i in list(range(0,21))]
            num_affected = 2
            pr_subset = .5
            

            #df_t = df_copy.copy(deep = True)

            if "2B" in simulations_to_run:
                for sigma_value in sigmas:
                    #print("running for sigma: " + str(sigma_value))

                    df_t["true_log_odds"] = copy_log_odds_org
                    add_bias_shifted_log_odds_choose_new_outcomes_NON_ADDICTIVE_SHIFT(df_t, s_bias, _delta, group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]

                    # add bias
                    add_bias_shifted_by_mu_NO_SHIFT(df_t, s_bias, mu, sigma_value, group_ind)

                    run_info["_delta"] = _delta
                    run_info["mu"] = mu
                    run_info["mu_suff"] = mu_suff
                    run_info["sigma"] = sigma_value
                    run_info["sigma_true"] = sigma_true
                    run_info["_Delta"] = _Delta
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = sigma_value

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                    if ((sigma_value == 0) or (sigma_value == .25)):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_2B_"+ str(sigma_value)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_2B_"+ str(sigma_value)+"_full_.csv")

                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        #print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "2B"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)

            #3
            
            df_copy = df_copy_within_run.copy(deep =True)
            mu = .5 ##### default might need to change
            mu_suff = 0
            sigma = .2
            sigma_true = .6
            _delta_values = [i * 0.05 for i in range(-20, 21)]

            if "3" in simulations_to_run:
                for _delta_value in _delta_values: 
                    #print("running for _delta: " + str(_delta_value))

                    df_t["true_log_odds"] = copy_log_odds_org
                    add_bias_shifted_log_odds_choose_new_outcomes_NON_ADDICTIVE_SHIFT(df_t, s_bias, _delta_value, group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]



                    # add bias
                    add_bias_shifted_by_mu_NO_SHIFT(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta_value
                    run_info["mu"] = mu
                    run_info["mu_suff"] = mu_suff
                    run_info["sigma"] = sigma
                    run_info["sigma_true"] = sigma_true
                    run_info["_Delta"] = _Delta
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = _delta_value

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")
                    
                    ###### here for predicted probability increase
                    probs_shift(df_t, s_bias, group_ind, "predicted_probs", mu)
                    ###### here for probability increase

                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                    if (_delta == 0):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_3_"+ str(_delta)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_3_"+ str(_delta)+"_full_.csv")



                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        #print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "3"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)
                        
                        df_copy = df_copy_within_run.copy(deep =True)
            
            
            
            #3_suff
            
        
            
            df_copy = df_copy_within_run.copy(deep =True)
            mu = 0
            mu_suff = .5 ###### default 
            sigma = .2
            sigma_true = .6
            _delta_values = [i * 0.05 for i in range(-20, 21)]

            if "3_suff" in simulations_to_run:
                for _delta_value in _delta_values: 
                    #print("running for _delta: " + str(_delta_value))

                    df_t["true_log_odds"] = copy_log_odds_org
                    add_bias_shifted_log_odds_choose_new_outcomes_NON_ADDICTIVE_SHIFT(df_t, s_bias, _delta_value, group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]



                    # add bias
                    add_bias_shifted_by_mu_NO_SHIFT(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta_value
                    run_info["mu"] = mu
                    run_info["mu_suff"] = mu_suff
                    run_info["sigma"] = sigma
                    run_info["sigma_true"] = sigma_true
                    run_info["_Delta"] = _Delta
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = _delta_value

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]
                    
                    add_bias_shifted_log_odds_choose_new_outcomes_NON_ADDICTIVE_SHIFT(df_t, s_bias, (-1*mu_suff), group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]

                    if (_delta == 0):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_3_suff_"+ str(_delta)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_3_suff_"+ str(_delta)+"_full_.csv")



                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        #print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "3_suff"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)


            
            #4
            mu = 1
            sigma = .2
            sigma_true = .6
            _delta = 0

            _Delta_values = [((i/10.0)-1.0) for i in list(range(0,21))]
            
            if "4" in simulations_to_run:

                for _Delta_value in _Delta_values: 
                    #print("running for _Delta: " + str(_Delta))

                    df_t["true_log_odds"] = copy_log_odds_org
                    add_bias_shifted_log_odds_by_group_ind_choose_new_outcomes(df_t, _Delta_value, group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]



                    # add bias
                    add_bias_shifted_by_mu_NO_SHIFT(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta
                    run_info["mu"] = mu
                    run_info["sigma"] = sigma
                    run_info["_Delta"] = _Delta_value
                    run_info["sigma_true"] = sigma_true
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = _Delta_value

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                    if (_Delta == 0):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_4_"+ str(_Delta)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_4_"+ str(_Delta)+"_full_.csv")

                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        #print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "4"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)
            #1E
            mu = .5
            sigma = .2
            sigma_true = .6
            _delta = 0
            _Delta = 0
            mu_suff = 0

            sigma_true_values   = [(i/10.0) for i in list(range(0,21))]
            #deep_df_copy = df_copy.copy(deep = True)
            
            if "1E" in simulations_to_run:
                for sigma_true_value in sigma_true_values: 
                    #print("running for sigma_true_value: " + str(sigma_true_value))

                    log_odds, _, _ = produce_true_log_odds(df_t, cols_t,group_ind, _Delta,sigma_true_value, coefficient_map )
                    df_t["true_log_odds"] = log_odds

                    log_odds_to_prob(df_t, "true_log_odds", "true_probs")

                    df_t["ReoffendedWithinTwoYears"] = df_t["true_probs"].apply(singular_row_bernoulli_draw)
                    add_bias_shifted_by_mu_NO_SHIFT(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta
                    run_info["mu"] = mu
                    run_info["mu_suff"] = mu_suff
                    run_info["sigma"] = sigma
                    run_info["_Delta"] = _Delta
                    run_info["sigma_true"] = sigma_true_value
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = sigma_true_value

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")
                    
                    probs_shift(df_t, s_bias, group_ind, "predicted_probs", mu)

                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]
                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                    if (sigma_true_value == 0) or (sigma_true_value == .25):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1E_"+ str(sigma_true_value)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1E_"+ str(sigma_true_value)+"_full_.csv")

                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        #print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "1E"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)
            
            
            
            #1E_suff
            
            
            mu = 0
            mu_suff = .5
            sigma = .2
            sigma_true = .6
            _delta = 0
            _Delta = 0

            sigma_true_values   = [(i/10.0) for i in list(range(0,21))]
            #deep_df_copy = df_copy.copy(deep = True)
            
            if "1E_suff" in simulations_to_run:
                for sigma_true_value in sigma_true_values: 
                    #print("running for sigma_true_value: " + str(sigma_true_value))
                    
                    df_t["true_log_odds"] = copy_log_odds_org

                    log_odds, _, _ = produce_true_log_odds(df_t, cols_t,group_ind, _Delta,sigma_true_value, coefficient_map )
                    df_t["true_log_odds"] = log_odds

                    log_odds_to_prob(df_t, "true_log_odds", "true_probs")

                    df_t["ReoffendedWithinTwoYears"] = df_t["true_probs"].apply(singular_row_bernoulli_draw)
                    add_bias_shifted_by_mu_NO_SHIFT(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta
                    run_info["mu"] = mu
                    run_info["mu_suff"] = mu_suff
                    run_info["sigma"] = sigma
                    run_info["_Delta"] = _Delta
                    run_info["sigma_true"] = sigma_true_value
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = sigma_true_value

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]
                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]
                    
                    add_bias_shifted_log_odds_choose_new_outcomes_NON_ADDICTIVE_SHIFT(df_t, s_bias, (-1*mu_suff), group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]


                    if (sigma_true_value == 0) or (sigma_true_value == .25):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1E_suff_"+ str(sigma_true_value)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1E_suff_"+ str(sigma_true_value)+"_full_.csv")

                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        #print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "1E_suff"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)

   

           

            

            #2E
            
    
            
            df_copy = df_copy_within_run.copy(deep = True)

            mu = 0
            _Delta = 0
            _delta = 0.25
            sigma_true = .6
            sigma = .2
            mu_suff = 0

            #df_t = df_copy.copy(deep = True)


            sigma_true_values   = [i/10 for i in list(range(0,21))]

            #df_t = df_copy.copy(deep = True)
            if "2E" in simulations_to_run:
                for sigma_true_value in sigma_true_values: 
                    #print("running for sigma_true_value: " + str(sigma_true_value))

                    df_t["true_log_odds"] = copy_log_odds_org

                    log_odds, _, _ = produce_true_log_odds(df_t, cols_t,group_ind, _Delta,sigma_true_value, coefficient_map )
                    df_t["true_log_odds"] = log_odds

                    add_bias_shifted_log_odds_choose_new_outcomes_NON_ADDICTIVE_SHIFT(df_t, s_bias, _delta, group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]


                    add_bias_shifted_by_mu_NO_SHIFT(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta
                    run_info["mu"] = mu
                    run_info["mu_suff"] = mu_suff
                    run_info["sigma"] = sigma
                    run_info["_Delta"] = _Delta
                    run_info["sigma_true"] = sigma_true_value
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = sigma_true_value

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]
                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                    if (sigma_true_value == 0) or (sigma_true_value == .25):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_2E_"+ str(sigma_true_value)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_2E_"+ str(sigma_true_value)+"_full_.csv")

                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        #print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "2E"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)
            
            #5A

            mu = 0
            _Delta = 0
            _delta = -0.5
            sigma_true = .6
            sigma = .2

            #df_t = df_copy.copy(deep = True)


            mu_values = [i/10 for i in list(range(0,21))]
            
            if "5A" in simulations_to_run:
                for mu in mu_values: 
                    #print("running for mu: " + str(mu))

                    df_t["true_log_odds"] = copy_log_odds_org

                    log_odds, _, _ = produce_true_log_odds(df_t, cols_t,group_ind, _Delta,sigma_true, coefficient_map )
                    df_t["true_log_odds"] = log_odds

                    add_bias_shifted_log_odds_choose_new_outcomes(df_t, s_bias, _delta, group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]


                    add_bias_shifted_by_mu_NO_SHIFT(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta
                    run_info["mu"] = mu
                    run_info["sigma"] = sigma
                    run_info["_Delta"] = _Delta
                    run_info["sigma_true"] = sigma_true_value
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = mu

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]
                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                    if (mu == 0):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_5A_"+ str(mu)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_5A_"+ str(mu)+"_full_.csv")

                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        #print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "5A"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)
            
                        #5A

            mu = 0
            _Delta = 0
            _delta = 0.5
            sigma_true = .6
            sigma = .2

            #df_t = df_copy.copy(deep = True)


            mu_values = [i/10 for i in list(range(0,21))]
            
            if "5B" in simulations_to_run:
                for mu in mu_values: 
                    #print("running for mu: " + str(mu))

                    df_t["true_log_odds"] = copy_log_odds_org

                    log_odds, _, _ = produce_true_log_odds(df_t, cols_t,group_ind, _Delta,sigma_true, coefficient_map )
                    df_t["true_log_odds"] = log_odds

                    add_bias_shifted_log_odds_choose_new_outcomes(df_t, s_bias, _delta, group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]


                    add_bias_shifted_by_mu_NO_SHIFT(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta
                    run_info["mu"] = mu
                    run_info["sigma"] = sigma
                    run_info["_Delta"] = _Delta
                    run_info["sigma_true"] = sigma_true_value
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = mu

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]
                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                    if (mu == 0):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_5B_"+ str(mu)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_5B_"+ str(mu)+"_full_.csv")

                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        #print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "5B"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)
            

            #1CD
            

            df_copy = df_copy_within_run.copy(deep = True)
            sigma_true = .6
            mu = .5
            mu_suff = 0
            sigma = .2
            _delta = 0
            _Delta = 0


            num_affected_values =  [i for i in list(range(0,len(cols_t) + 1))]
            pr_subset_values = [(i/10.0) for i in list(range(0,11))]
            
            if "1CD" in simulations_to_run:
                for num_affected_value in num_affected_values:


                    for pr_subset_value in pr_subset_values:
                        if ((num_affected_value == 2) or (pr_subset_value == .5)):
                            df_t = df_copy.copy(deep = True)

                            cols_t = copy.deepcopy(cols_copy)

                            df_t, cols_t, key, key_value, group_ind, s_bias, len_p, len_c, true_log_odds, coefficient_map = pick_protected_class_bias_subset_no_filter_protected_class_given(df_t, key, key_value,  cols_t, 150, num_affected_value, pr_subset_value, _Delta,sigma_true,coefficient_map)

                            run_info_t = {}
                            run_info_t["run_number"] = run_number



                            df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]

                            #print("Protected class:"+ key+ " : " + str(key_value))
                            #print("Biased subset: "+ str(s_bias))
                            #print("Number of individuals in bias subset for protected class: " + str(len_p))
                            #print("Number of individuals in bias subset for non-protected class: " + str(len_c))
                            #print("Coefficients used to protected true log-odds: "+ str(coefficient_map))

                            if ((num_affected_value != 0) and (pr_subset_value != 0)):
                                to_choose =  df_t[s_bias.keys()].isin(s_bias).all(axis=1).astype(int)
                                df_t["in_bias_subset"] = to_choose
                            else:
                                df_t["in_bias_subset"] = 0

                            run_info_t["protected_class"] = (key, key_value)
                            run_info_t["_Delta"] = _Delta
                            run_info_t["sigma_true"] = sigma_true

                            run_info_t["coefficient_values"] = coefficient_map


                            run_info_t["selected_bias_subset"] = s_bias
                            run_info_t["group_ind"] = group_ind
                            run_info_t["number_of_rows_protected_class_bias_subset"] = len_p
                            run_info_t["number_of_rows_control_bias_subset"] = len_c

                            add_bias_shifted_by_mu_NO_SHIFT(df_t, s_bias, mu, sigma, group_ind)

                            run_info_t["_delta"] = _delta
                            run_info_t["mu"] = mu
                            run_info_t["mu_suff"] = mu_suff
                            run_info_t["sigma"] = sigma
                            run_info_t["sigma_true"] = sigma_true
                            run_info_t["_Delta"] = _Delta
                            run_info_t["num_affected"] = num_affected_value
                            run_info_t["pr_subset"] = pr_subset_value

                            run_info_t["varying_parameter"] = str(num_affected_value)+"_"+str(pr_subset_value)

                            log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")
                            
                            probs_shift(df_t, s_bias, group_ind, "predicted_probs", mu)

                            df_copy["tilde_p"] = df_t["predicted_probs"]
                        #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]


                            if ((num_affected_value == 2) and (pr_subset_value == .5)):
                                df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1CD"+"_.csv")
                                df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1CD"+"_full_.csv")

                            yaml_funcs = YamlFunctions(yaml_configs_path)
                            fsscan_configs = yaml_funcs.run()

                            #setting up logger
                            cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                            # performing initial data preprocessing, in this case there is not any
                            data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                            data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                            # producing all scans in config file
                            scans = yaml_funcs.produce_scans(data, key, key_value)
                            for scan in scans:
                                #print(scan)

                                scan_params = {}
                                scan_params["scan_info"] = scan
                                scan_params["dataset_yaml"] = dataset_yaml
                                scan_params["data"] =  data.copy(deep =True)
                                scan_params["p_bin_var"] = p_bin_var
                                scan_params["tilde_probability_var"] = tilde_probability_var
                                scan_params["df_t"] = df_t.copy(deep = True)
                                scan_params["df_copy"] = df_copy.copy(deep = True)
                                scan_params["experiment_name"] = "1CD"
                                run_info_t["scan_params"] = scan_params
                                run_info_deep_copy = copy.deepcopy(run_info_t)
                                run_infos.append(run_info_deep_copy)
                                
              #1CD_suff
            

            df_copy = df_copy_within_run.copy(deep = True)
            sigma_true = .6
            mu = 0
            mu_suff = .5
            sigma = .2
            _delta = 0
            _Delta = 0


            num_affected_values =  [i for i in list(range(0,len(cols_t) + 1))]
            pr_subset_values = [(i/10.0) for i in list(range(0,11))]
            
            if "1CD_suff" in simulations_to_run:
                for num_affected_value in num_affected_values:


                    for pr_subset_value in pr_subset_values:
                        if ((num_affected_value == 2) or (pr_subset_value == .5)):
                            df_t = df_copy.copy(deep = True)

                            cols_t = copy.deepcopy(cols_copy)

                            df_t, cols_t, key, key_value, group_ind, s_bias, len_p, len_c, true_log_odds, coefficient_map = pick_protected_class_bias_subset_no_filter_protected_class_given(df_t, key, key_value,  cols_t, 150, num_affected_value, pr_subset_value, _Delta,sigma_true,coefficient_map)

                            run_info_t = {}
                            run_info_t["run_number"] = run_number



                            df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]

                            #print("Protected class:"+ key+ " : " + str(key_value))
                            #print("Biased subset: "+ str(s_bias))
                            #print("Number of individuals in bias subset for protected class: " + str(len_p))
                            #print("Number of individuals in bias subset for non-protected class: " + str(len_c))
                            #print("Coefficients used to protected true log-odds: "+ str(coefficient_map))

                            if ((num_affected_value != 0) and (pr_subset_value != 0)):
                                to_choose =  df_t[s_bias.keys()].isin(s_bias).all(axis=1).astype(int)
                                df_t["in_bias_subset"] = to_choose
                            else:
                                df_t["in_bias_subset"] = 0

                            run_info_t["protected_class"] = (key, key_value)
                            run_info_t["_Delta"] = _Delta
                            run_info_t["sigma_true"] = sigma_true

                            run_info_t["coefficient_values"] = coefficient_map


                            run_info_t["selected_bias_subset"] = s_bias
                            run_info_t["group_ind"] = group_ind
                            run_info_t["number_of_rows_protected_class_bias_subset"] = len_p
                            run_info_t["number_of_rows_control_bias_subset"] = len_c

                            add_bias_shifted_by_mu_NO_SHIFT(df_t, s_bias, mu, sigma, group_ind)

                            run_info_t["_delta"] = _delta
                            run_info_t["mu"] = mu
                            run_info_t["mu_suff"] = mu_suff
                            run_info_t["sigma"] = sigma
                            run_info_t["sigma_true"] = sigma_true
                            run_info_t["_Delta"] = _Delta
                            run_info_t["num_affected"] = num_affected_value
                            run_info_t["pr_subset"] = pr_subset_value

                            run_info_t["varying_parameter"] = str(num_affected_value)+"_"+str(pr_subset_value)

                            log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                            df_copy["tilde_p"] = df_t["predicted_probs"]
                        #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]
                            
                            add_bias_shifted_log_odds_choose_new_outcomes_NON_ADDICTIVE_SHIFT(df_t, s_bias, (-1*mu_suff), group_ind)
                            df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]


                            if ((num_affected_value == 2) and (pr_subset_value == .5)):
                                df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1CD_suff"+"_.csv")
                                df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1CD_suff"+"_full_.csv")

                            yaml_funcs = YamlFunctions(yaml_configs_path)
                            fsscan_configs = yaml_funcs.run()

                            #setting up logger
                            cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                            # performing initial data preprocessing, in this case there is not any
                            data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                            data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                            # producing all scans in config file
                            scans = yaml_funcs.produce_scans(data, key, key_value)
                            for scan in scans:
                                #print(scan)

                                scan_params = {}
                                scan_params["scan_info"] = scan
                                scan_params["dataset_yaml"] = dataset_yaml
                                scan_params["data"] =  data.copy(deep =True)
                                scan_params["p_bin_var"] = p_bin_var
                                scan_params["tilde_probability_var"] = tilde_probability_var
                                scan_params["df_t"] = df_t.copy(deep = True)
                                scan_params["df_copy"] = df_copy.copy(deep = True)
                                scan_params["experiment_name"] = "1CD_suff"
                                run_info_t["scan_params"] = scan_params
                                run_info_deep_copy = copy.deepcopy(run_info_t)
                                run_infos.append(run_info_deep_copy)

            df_copy = df_copy_within_run.copy(deep = True)
           # deep_df_copy= df_copy.copy(deep = True)

            #2CD
            
 

            df_copy = df_copy_within_run.copy(deep = True)

            mu = 0
            mu_suff = 0
            _Delta = 0
            _delta = 0.25
            sigma_true = .6
            sigma = .2

            #df_t = df_copy.copy(deep = True)

            num_affected_values =  [i for i in list(range(0,len(cols_t)+1))]
            pr_subset_values = [(i/10.0) for i in list(range(0,11))]
            if "2CD" in simulations_to_run:
                for num_affected_value in num_affected_values:



                    for pr_subset_value in pr_subset_values:
                        if ((num_affected_value == 2) or (pr_subset_value == .5)):
                            df_t = df_copy.copy(deep = True)


                            cols_t = copy.deepcopy(cols_copy)

                            df_t, cols_t, key, key_value, group_ind, s_bias, len_p, len_c, true_log_odds, coefficient_map = pick_protected_class_bias_subset_no_filter_protected_class_given(df_t, key, key_value,  cols_t, 150, num_affected_value, pr_subset_value, _Delta,sigma_true,coefficient_map)

                            run_info_t = {}
                            run_info_t["run_number"] = run_number



                            df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]
                            if len(s_bias.keys()) != 0 :
                                add_bias_shifted_log_odds_choose_new_outcomes_NON_ADDICTIVE_SHIFT(df_t, s_bias, _delta, group_ind)
                            df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]

                            #print("Protected class:"+ key+ " : " + str(key_value))
                            #print("Biased subset: "+ str(s_bias))
                            #print("Number of individuals in bias subset for protected class: " + str(len_p))
                            #print("Number of individuals in bias subset for non-protected class: " + str(len_c))
                            #print("Coefficients used to protected true log-odds: "+ str(coefficient_map))

                            if ((num_affected_value != 0) and (pr_subset_value != 0)):
                                to_choose =  df_t[s_bias.keys()].isin(s_bias).all(axis=1).astype(int)
                                df_t["in_bias_subset"] = to_choose
                            else:
                                df_t["in_bias_subset"] = 0

                            run_info_t["protected_class"] = (key, key_value)
                            run_info_t["_Delta"] = _Delta
                            run_info_t["sigma_true"] = sigma_true

                            run_info_t["coefficient_values"] = coefficient_map


                            run_info_t["selected_bias_subset"] = s_bias
                            run_info_t["group_ind"] = group_ind
                            run_info_t["number_of_rows_protected_class_bias_subset"] = len_p
                            run_info_t["number_of_rows_control_bias_subset"] = len_c

                            add_bias_shifted_by_mu_NO_SHIFT(df_t, s_bias, mu, sigma, group_ind)

                            run_info_t["_delta"] = _delta
                            run_info_t["mu"] = mu
                            run_info_t["sigma"] = sigma
                            run_info_t["mu_suff"] = mu_suff
                            run_info_t["sigma_true"] = sigma_true
                            run_info_t["_Delta"] = _Delta
                            run_info_t["num_affected"] = num_affected_value
                            run_info_t["pr_subset"] = pr_subset_value

                            run_info_t["varying_parameter"] = str(num_affected_value)+"_"+str(pr_subset_value)

                            log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                            df_copy["tilde_p"] = df_t["predicted_probs"]
                        #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                            if ((num_affected_value == 2) and (pr_subset_value == .5)):
                                df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_2CD"+"_.csv")
                                df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_2CD"+"_full_.csv")

                            yaml_funcs = YamlFunctions(yaml_configs_path)
                            fsscan_configs = yaml_funcs.run()

                            #setting up logger
                            cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                            # performing initial data preprocessing, in this case there is not any
                            data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                            data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                            # producing all scans in config file
                            scans = yaml_funcs.produce_scans(data, key, key_value)
                            for scan in scans:
                                #print(scan)

                                scan_params = {}
                                scan_params["scan_info"] = scan
                                scan_params["dataset_yaml"] = dataset_yaml
                                scan_params["data"] =  data.copy(deep =True)
                                scan_params["p_bin_var"] = p_bin_var
                                scan_params["tilde_probability_var"] = tilde_probability_var
                                scan_params["df_t"] = df_t.copy(deep = True)
                                scan_params["df_copy"] = df_copy.copy(deep = True)
                                scan_params["experiment_name"] = "2CD"
                                run_info_t["scan_params"] = scan_params
                                run_info_deep_copy = copy.deepcopy(run_info_t)
                                run_infos.append(run_info_deep_copy)

            df_copy = deep_df_copy
            deep_df_copy= df_copy.copy(deep = True)







            print("Number of runs: "+ str(len(run_infos)))
            
            
            # save for hpc running
            
            for idx,run_info in enumerate(run_infos):
                
                d = {'timestr':timestr  , 'run_info': run_info}
            
                joblib.dump(d, 'hpc_info/_'+str(run_number)+'_'+str(idx)+'.pkl')
            
            print("saved run number: "+ str(run_number))
            d={}


            # shuffle list 

            #random.shuffle(run_infos)

            # running multi-parralel processing

            unsuccessful = False
        except Exception as e:
            print("running again : regenerating : error below")
            print(traceback.format_exc())

    if False:
        active_workers = 0
        completed_workers = 0

        active_processes_list = []
        active_worker_constant = 15


        #run_infos = run_infos[:69]
        while (len(run_infos) >0) or (active_workers > completed_workers):
            print(len(run_infos) )

            # checking if there are less than 10 processes running
            if ((len(active_processes_list) < active_worker_constant) and ( len(run_infos) >0)):
                print(len(run_infos))

                needed_workers = active_worker_constant - len(active_processes_list)

                if (needed_workers > (len(run_infos))):
                    needed_workers = len(run_infos)

                #data_copy = sample_df.copy(deep= True)

                print("Will create " + str(needed_workers) + " processes")

                new_workers = [mp.Process(target = run_wrapper, args = ([run_infos.pop()])) for x in range(0,needed_workers )]

                print(len(run_infos))

                for worker in new_workers:
                    active_workers = active_workers + 1
                    worker.start()

                    print("starting worker " + str(active_workers))

                active_processes_list = active_processes_list + new_workers

            # sleep for 30 seconds

            print("sleeping for 30 seconds")

            time.sleep(30)

                                                                                                                                    # check if workers are alive or not
            if (len(run_infos) == 0):
                print("all jobs are assigned.. waiting for all workers to complete")

            replacement_list = []

            for process in active_processes_list:
                if (process.is_alive() == True):
                    replacement_list.append(process)
                else:
                    print("there is a complete worker")
                    completed_workers = completed_workers + 1
                    print("total complete workers : " + str(completed_workers))
                    if (process.exitcode != 0):
                        print("there was an unsuccesfful run!")
                        print(process.exitcode)
                        #sys.exit("issue with run")

                    process.terminate()

                    print('ended completed process')


            active_processes_list = replacement_list

          
            
        


